package com.bigdata.spark.core.rdd.operator.transform

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object RDD_Operator_Transform06_Test1 {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator")
    val sc = new SparkContext(sparkConf)

    //TODO 算子-groupBy
    val rdd = sc.makeRDD(List("Hello","Spark","Scala","Hadoop"),2)


    // 分组和分区没有必然的关系
    // groupBy会将数据打乱（打散），重新组合，这个过程称之为shuffle
    val groupRDD = rdd.groupBy(_.charAt(0))

    groupRDD.collect().foreach(println)

    sc.stop()
  }
}
